## Literature analysis -----

# load packages
source("packages.r")

## data import and preparation ------------------------------

vaa_df <- read_xlsx(path = "../data/vaa-studies.xlsx")


## first descriptives -----------------------

# number of effects
nrow(vaa_df)

# number of studies
length(unique(vaa_df$bibkey))


## clean variables --------------------------

# merge vote change / vote choice categories
vaa_df$depvar_coarse[vaa_df$depvar_coarse == "vote change"] <- "vote choice"
vaa_df$depvar_coarse %>% str_to_title %>% table %>% sort(decreasing = TRUE)

# make numeric
vaa_df$sample_size <- as.numeric(vaa_df$sample_size)
vaa_df$effect_size <- as.numeric(vaa_df$effect_size)
vaa_df$effect_se <- as.numeric(vaa_df$effect_se)

# set label for undefined effect size types
vaa_df$effect_size_type[is.na(vaa_df$effect_size_type) ] <- "NOT DEFINED YET"

# design of study
vaa_df$study_design <- NA
vaa_df$study_design <- ifelse(vaa_df$experimental == "yes", "Experimental", vaa_df$study_design)
vaa_df$study_design <- ifelse(vaa_df$experimental == "no" & str_detect(vaa_df$modeling, "IV|selection|matching"), "Observational - selection models", vaa_df$study_design)
vaa_df$study_design <- ifelse(vaa_df$experimental == "no" & !str_detect(vaa_df$modeling, "IV|selection|matching") & vaa_df$before_after_measure == "yes", "Observational - panel", vaa_df$study_design)
vaa_df$study_design <- ifelse(vaa_df$experimental == "no" & !str_detect(vaa_df$modeling, "IV|selection|matching") & vaa_df$before_after_measure == "no", "Observational - no panel", vaa_df$study_design)
tabyl(vaa_df$study_design)

# country
vaa_df$country_coarse <- vaa_df$country %>% str_replace_all(fixed(" - EU"), "")


# study and election labels
vaa_df$study_label <- paste(vaa_df$author_label, vaa_df$year, sep = ", ")
vaa_df$election <- paste(vaa_df$country, vaa_df$election_year, sep = " ")


## effect standardizations ---------------

## overview of effect size types by outcome

tabyl(vaa_df$depvar_coarse)
tabyl(vaa_df, depvar_coarse, depvar_measure)
tabyl(vaa_df$effect_size_type)
tabyl(vaa_df, depvar_coarse, effect_size_type)


## turn effects in binary outcomes models into log odds

## coefficients
vaa_df$log_odds <- NA
vaa_df$log_odds_se <- NA

# log odds --> log odds
vaa_df$log_odds[vaa_df$effect_size_type == "log odds"] <- vaa_df$effect_size[vaa_df$effect_size_type == "log odds"] 
vaa_df$log_odds_se[vaa_df$effect_size_type == "log odds"] <- vaa_df$effect_se[vaa_df$effect_size_type == "log odds"]

# probit --> log odds
vaa_df$log_odds[vaa_df$effect_size_type == "probit coef"] <- vaa_df$effect_size[vaa_df$effect_size_type == "probit coef"] * 1.6 # see Gelman: https://statmodeling.stat.columbia.edu/2006/06/06/take_logit_coef/ 
vaa_df$log_odds_se[vaa_df$effect_size_type == "probit coef"] <- vaa_df$effect_se[vaa_df$effect_size_type == "probit coef"] * 1.6 # see Gelman: https://statmodeling.stat.columbia.edu/2006/06/06/take_logit_coef/ 

# unstandardized regression coefficient --> log odds
vaa_df$log_odds[vaa_df$effect_size_type == "linear coef" & vaa_df$depvar_measure == "0-1"] <- vaa_df$effect_size[vaa_df$effect_size_type == "linear coef" & vaa_df$depvar_measure == "0-1"] * 4 # see Gelman: https://www.gettinggeneticsdone.com/2010/12/using-divide-by-4-rule-to-interpret.html
vaa_df$log_odds_se[vaa_df$effect_size_type == "linear coef" & vaa_df$depvar_measure == "0-1"] <- vaa_df$effect_se[vaa_df$effect_size_type == "linear coef" & vaa_df$depvar_measure == "0-1"] * 4 # see Gelman: https://www.gettinggeneticsdone.com/2010/12/using-divide-by-4-rule-to-interpret.html

# percentage point difference --> log odds
vaa_df$log_odds[vaa_df$effect_size_type == "percentage point difference"] 

# reconstruct 2x2 table
vaa_df$tpos <-  as.numeric(vaa_df$vaa_usage_sample) * (vaa_df$percent_outcome_treatment_group/100)
vaa_df$tneg <-  as.numeric(vaa_df$vaa_usage_sample) * (1-(vaa_df$percent_outcome_treatment_group/100))
vaa_df$cpos <-  (vaa_df$sample_size - as.numeric(vaa_df$vaa_usage_sample)) * (vaa_df$percent_outcome_control_group/100)
vaa_df$cneg <-  (vaa_df$sample_size - as.numeric(vaa_df$vaa_usage_sample)) * (1-(vaa_df$percent_outcome_control_group/100))

# calculate log OR with escalc
out_df <- escalc(measure = "OR", ai = tpos, bi = tneg, ci = cpos, di = cneg, data = vaa_df[vaa_df$effect_size_type == "percentage point difference",])
tabyl(vaa_df$effect_size_type)

vaa_df$log_odds[vaa_df$effect_size_type == "percentage point difference"] <- out_df$yi
vaa_df$log_odds_se[vaa_df$effect_size_type == "percentage point difference"] <- sqrt(out_df$vi)


## turn effects in continuous outcomes models (political knowledge) into partial correlations
vaa_df$pcor <- NA
vaa_df$pcor_se <- NA
vaa_df$t_stat <- vaa_df$effect_size / vaa_df$effect_se

out_df <- escalc(measure = "PCOR", # partial correlation
                 ti = t_stat, # test statistics (t-tests)
                 ni = sample_size, # study sample size
                 mi = n_predictors,  # number of predictors in model
                 data = vaa_df
)

vaa_df$pcor[!is.na(vaa_df$n_predictors)] <- out_df$yi[!is.na(vaa_df$n_predictors)]
vaa_df$pcor_se[!is.na(vaa_df$n_predictors)] <- sqrt(out_df$vi[!is.na(vaa_df$n_predictors)])


# outcome-specific effect directions
vaa_df$effect_direction_quant <- ifelse(vaa_df$effect_direction == "positive", 1, -1)
vaa_df$effect_direction_quant <- ifelse(vaa_df$effect_significance_05 == "yes", vaa_df$effect_direction_quant, 0)

vaa_df$effect_direction_vote[vaa_df$depvar_coarse == "vote choice"] <- vaa_df$effect_direction_quant[vaa_df$depvar_coarse == "vote choice"]
vaa_df$effect_direction_turnout[vaa_df$depvar_coarse == "turnout"] <- vaa_df$effect_direction_quant[vaa_df$depvar_coarse == "turnout"]
vaa_df$effect_direction_knowledge[vaa_df$depvar_coarse == "political knowledge"] <- vaa_df$effect_direction_quant[vaa_df$depvar_coarse == "political knowledge"]
vaa_df$effect_direction_infoseeking[vaa_df$depvar_coarse == "information seeking"] <- vaa_df$effect_direction_quant[vaa_df$depvar_coarse == "information seeking"]

## create study-level dataset ---------------
study_design_levels <- rev(c("Experimental", "Observational - selection models", "Observational - panel", "Observational - no panel"))

vaa_studies_df <- vaa_df %>% group_by(bibkey) %>% summarize(year = first(year),
                                                                journal = first(journal),
                                                                title = first(title),
                                                                n_countries = n_distinct(country),
                                                                countries = paste0(unique(country_coarse), collapse = ", "),
                                                                elections = paste0(unique(election), collapse = ", "),
                                                                data_collection_mode = paste0(unique(data_collection_mode), collapse = ","),
                                                                before_after_measure = paste0(unique(before_after_measure), collapse = ","),
                                                                experimental = paste0(unique(experimental), collapse = ","),
                                                                n_effects = n(),
                                                                study_design = paste0(unique(study_design), collapse = ","),
                                                                depvar_turnout = "turnout" %in% depvar_coarse,
                                                                depvar_vote_change = "vote choice" %in% depvar_coarse,
                                                                depvar_political_knowledge = "political knowledge" %in% depvar_coarse,
                                                                depvar_information_seeking = "information seeking" %in% depvar_coarse,
                                                                effect_direction_vote = mean(effect_direction_vote, na.rm = TRUE),
                                                                effect_direction_turnout = mean(effect_direction_turnout, na.rm = TRUE),
                                                                effect_direction_knowledge = mean(effect_direction_knowledge, na.rm = TRUE),
                                                                effect_direction_infoseeking = mean(effect_direction_infoseeking, na.rm = TRUE))

vaa_studies_df$effect_direction_vote_tendency <- ifelse(vaa_studies_df$effect_direction_vote > .5, "+", 
                                                        ifelse(vaa_studies_df$effect_direction_vote < -.5, "-",
                                                               ifelse(!is.nan(vaa_studies_df$effect_direction_vote), "0", "")))
vaa_studies_df$effect_direction_turnout_tendency <- ifelse(vaa_studies_df$effect_direction_turnout > .5, "+", 
                                                        ifelse(vaa_studies_df$effect_direction_turnout < -.5, "-",
                                                               ifelse(!is.nan(vaa_studies_df$effect_direction_turnout), "0", "")))
vaa_studies_df$effect_direction_knowledge_tendency <- ifelse(vaa_studies_df$effect_direction_knowledge > .5, "+", 
                                                        ifelse(vaa_studies_df$effect_direction_knowledge < -.5, "-",
                                                               ifelse(!is.nan(vaa_studies_df$effect_direction_knowledge), "0", "")))
vaa_studies_df$effect_direction_infoseeking_tendency <- ifelse(vaa_studies_df$effect_direction_infoseeking > .5, "+", 
                                                        ifelse(vaa_studies_df$effect_direction_infoseeking < -.5, "-",
                                                               ifelse(!is.nan(vaa_studies_df$effect_direction_infoseeking), "0", "")))

vaa_studies_print <- dplyr::select(vaa_studies_df, bibkey, effect_direction_turnout_tendency, effect_direction_vote_tendency, effect_direction_knowledge_tendency, effect_direction_infoseeking_tendency, study_design)
vaa_studies_print$study_design[vaa_studies_print$bibkey == "Garzia2017"] <- "Observational - no panel"
vaa_studies_print <- arrange(vaa_studies_print, factor(study_design, levels = study_design_levels))
study_design_count <- table(factor(vaa_studies_print$study_design, levels = study_design_levels))
study_design_count

# export lit table to latex
vaa_studies_latex <- dplyr::select(vaa_studies_print, -study_design)
vaa_studies_latex$bibkey <- paste0("\\cite{", vaa_studies_latex$bibkey, "}")
colnames(vaa_studies_latex) <- c("\\multicolumn{1}{l}{Article}", "Turnout", "Vote choice", "Political knowledge", "Information seeking")

addtorow <- list()
addtorow$pos <- list(-1, 0, 5, 14, 20, nrow(vaa_studies_latex))
addtorow$command <- c("\\toprule \n & \\multicolumn{4}{c}{Effect tendency for outcome}\\\\\n\\cmidrule{2-5}",
                      "\\midrule \n \\multicolumn{1}{l}{\\textit{Observational - no panel}} & & &  & \\\\\n",
                      "\\midrule \n \\multicolumn{1}{l}{\\textit{Observational - panel}} & & & &  \\\\\n",
                      "\\midrule \n \\multicolumn{1}{l}{\\textit{Observational - selection models}} & & & &  \\\\\n",
                      "\\midrule \n \\multicolumn{1}{l}{\\textit{Experimental}} & & & &  \\\\\n",
                      "\\bottomrule \n \\multicolumn{5}{p{16cm}}{\\scriptsize \\textit{Note:} Reported effect tendencies indicate positive (+) or negative (-) effects significant at the 5\\% level and null effects (0). Studies were classified as follows: (1) \\emph{Observational - no panel} when VAA usage is not manipulated and the outcome is measured in a singular observation (post VAA usage) without a pre-VAA baseline, (2) \\emph{Observational - panel} when the outcome is measured before and after VAA use allowing to identify changes, (3) \\emph{Observational - selection models} when authors estimate the effect through a two-equation structure, where selection into VAA use is explicitly modeled, and (4) \\emph{Experimental} when VAA usage is randomly manipulated.}")

print(xtable(vaa_studies_latex, align = c("r","r", "x{1.8cm}", "x{1.8cm}", "x{1.8cm}", "x{2cm}"), digits = 0, caption = "Overview of VAA effects studies by study design and outcome of interest.\\label{tab:vaameta}"), booktabs = TRUE, size = "footnotesize", caption.placement = "top", table.placement = "t!hb",  hline.after = NULL, add.to.row = addtorow, include.rownames=FALSE, sanitize.text.function = function(x) {x}, file = "../output/tab-vaa-meta.tex")




## make graph to illustrate effects by outcome ---------------

vaa_turnout <- filter(vaa_df, depvar_coarse == "turnout")
turnout_effects <- vaa_turnout %>% group_by(effect_direction, effect_significance_05) %>% summarize(num = n())
turnout_effects$outcome <- "Turnout"

vaa_votechoice <- filter(vaa_df, depvar_coarse == "vote choice" | depvar_coarse ==  "vote change")
votechoice_effects <- vaa_votechoice %>% group_by(effect_direction, effect_significance_05) %>% summarize(num = n())
votechoice_effects$outcome <- "Vote choice"

vaa_knowledge <- filter(vaa_df, depvar_coarse == "political knowledge")
knowledge_effects <- vaa_knowledge %>% group_by(effect_direction, effect_significance_05) %>% summarize(num = n())
knowledge_effects$outcome <- "Political knowledge"

vaa_infoseeking <- filter(vaa_df, depvar_coarse == "information seeking")
infoseeking_effects <- vaa_infoseeking %>% group_by(effect_direction, effect_significance_05) %>% summarize(num = n())
infoseeking_effects$outcome <- "Information seeking"

vaa_effects <- rbind(turnout_effects, votechoice_effects, knowledge_effects, infoseeking_effects)
vaa_effects$num[vaa_effects$effect_direction == "negative"] <- - vaa_effects$num[vaa_effects$effect_direction == "negative"]

vaa_effects$cats <- interaction(vaa_effects$effect_direction, vaa_effects$effect_significance_05) 
vaa_effects$cats <- factor(vaa_effects$cats, levels = c("negative.yes", "negative.no", "positive.yes", "positive.no"))
vaa_effects$outcome <- factor(vaa_effects$outcome, levels = rev(sort(table(vaa_df$depvar_coarse), decreasing = TRUE)) %>% names %>% firstup)


## plot effects by outcome
p <- ggplot() + 
  geom_bar(data = vaa_effects, aes(x = outcome, y = num, fill = cats), stat = "identity", position = position_stack()) +    coord_flip() + 
  guides(fill = guide_legend(title = "Effects")) + 
  scale_fill_manual(labels = c("negative, significant", "negative, insignificant", "positive, significant", "positive, insignificant"), values=c("#e66101", "#fdb863", "#5e3c99", "#b2abd2")) + 
  scale_y_continuous(limits = c(-5, 30), breaks = c(-5, seq(0, 30, 10)), labels = c("5", seq(0, 30, 10))) + 
  geom_hline(yintercept = 0, size = 1) + 
  theme_ipsum_rc(axis_title_just = "c") + 
  xlab("") + ylab("Number of effects") + 
  theme(axis.title.x = element_text(size = 15),
        axis.title.y = element_text(size = 15),
        legend.position = c(.85, .25),
        legend.box = "horizontal",
        legend.background = element_rect(fill = "white", size = 0),
        plot.margin=unit(c(0.1,0.1,0.1,0.1), "cm"))
p
ggsave(p, file = "../output/vaa_literatureeffects.png", width = 5, height = 2, units = "cm", dpi = 300, scale = 5)

  

